library(readxl) # read xlsx datalibrary(dplyr) # data handlinglibrary(tidyr) # data transformationlibrary(ggplot2) # data visualizationlibrary(patchwork) # join ggplot graphicslibrary(ggpubr) # ggplot handlinglibrary(kableExtra) # print tableslibrary(mgcv) # regression model estimationlibrary(APCtools) # regression model visualizationlibrary(plotROC) # ROC curve and AUC calculation# set global ggplot themetheme_set(theme_minimal() +theme(plot.title =element_text(hjust =0.5),plot.background =element_rect(fill ="white", color ="white"),panel.background =element_rect(fill ="white", color ="white")))
Code
# colors for the main areas of analysiscol_vector <-c("knowledge"="#2F9D64","beliefs"="#00868B","barriers"="#0062A3")
Data preparation
Code
# read datadat <-read_excel("../data/Plant-Based Survey - Registered Dietitians_V1.0 .xlsx",sheet ="Combined", skip =2, n_max =335)# read lookup table from the first two rows in the Excel filedat_lookup <-read_excel("../data/Plant-Based Survey - Registered Dietitians_V1.0 .xlsx",sheet ="Combined", n_max =2)
Outlier handling
One person (ID 135) has age value 2. Since all other responses by this person seem plausible we assume this was a typo and set the person’s age value to missing.
Code
dat$`What is your age (in years)?`[dat$`What is your age (in years)?`==2] <-NA
# define a vector of the subspecialty areas that we focus onsubspecialties_varVector <-c("weight management"="dieteticsSubspecialty_obesityOrWeightManagement","gastroenterology"="dieteticsSubspecialty_gastroenterology","diabetes"="dieteticsSubspecialty_diabetes","paediatrics"="dieteticsSubspecialty_paediatrics","elderly care"="dieteticsSubspecialty_elderly","oncology"="dieteticsSubspecialty_oncology","eating disorders"="dieteticsSubspecialty_psychEating")
Code
# divide age and years of practice by 10, to facilitate the interpretation of regression coefficientsdat <- dat %>%mutate(age_10yearUnits = age /10,dietitian_forHowLong_10yearUnits = dietitian_forHowLong /10)
Description baseline demographics
For the diet variable, ‘Flexitarian’ and ‘Pescetarian’ are combined into one group.
For all of the following baseline demographics a hypothesis test is calculated to test for potential differences between dietitians’ diets, and the respective p-value is reported. For metric variables (age and years of practice) an ANOVA is calculated, for categorical variables a chi-squared test of independence is calculated.
For the diet variable, ‘Flexitarian’ and ‘Pescetarian’ are combined into one group.
Code
# test calculationm_age <-lm(age ~ diet, data = dat_descr)p_age <-anova(m_age)$`Pr(>F)`[1]p_edu <-chisq.test(x = dat_descr$diet,y = dat_descr$education)$p.valuem_ypr <-lm(dietitian_forHowLong ~ diet, data = dat_descr)p_ypr <-anova(m_ypr)$`Pr(>F)`[1]# Note: both the 'area of work' and 'area of subspecialty' are multiple choice items, so we first calculate a contingency table on which we then run the test.# Also, only selected areas are evaluated for bothtab_workArea <- dat_descr %>%group_by(diet) %>%summarize(n_hospital =sum(dieteticsArea_hospital =="yes"),n_primCareORcommunity =sum((dieteticsArea_primaryCare =="yes") | (dieteticsArea_community =="yes")),n_private =sum(dieteticsArea_privatePractice =="yes"),n_academia =sum(dieteticsArea_academia =="yes"),n_publicHealth =sum(dieteticsArea_publicHealth =="yes")) %>%select(-diet)p_workArea <-chisq.test(tab_workArea)$p.valuetab_subsp <- dat_descr %>%group_by(diet) %>%summarize(n_obesity =sum(dieteticsSubspecialty_obesityOrWeightManagement =="yes"),n_diabetes =sum(dieteticsSubspecialty_diabetes =="yes"),n_gastroen =sum(dieteticsSubspecialty_gastroenterology =="yes"),n_paediatr =sum(dieteticsSubspecialty_paediatrics =="yes"),n_elderly =sum(dieteticsSubspecialty_elderly =="yes"),n_oncology =sum(dieteticsSubspecialty_oncology =="yes"),n_eatingDisorders =sum(dieteticsSubspecialty_psychEating =="yes")) %>%select(-diet)p_subsp <-chisq.test(tab_subsp)$p.value
test between diet and sex: not tested due to too small n
test between diet and age: 0.0442
test between diet and level of education: 0.4079
test between diet and years of working as a dietitian: 0.088
test between diet and area of work: 0.056
test between diet and area of subspecialty: 0.5155
Regression model preparation
Exclude purely academic dietitians
6 dietitians have a purely academic area of work. These dietitians will be excluded from all following analyses since focus is on practitioning dietitians.
Code
dat <- dat %>%filter(dietetics_area !="Academia;")
Sensitivity analysis: Exclusion of new dietitians
As a sensitivity analysis we report the result figures in the supplementary materials of the publication for the case when excluding all newly practitioning dietitians (i.e., dietitians with less than 3 years of practical experience) from the estimation. In this way we can check if the inclusion or exclusion of this group influences the study results to a relevant extent.
By default, all of the following analyses are calculated without excluding this group of dietitians. If you want to exclude them and thus run the sensitivity analysis, simply uncomment the following line of code and re-run all of the following analyses.
Code
# dat <- dat %>% filter(dietitian_forHowLong >= 3)
Check for multicollinearities between subspecialties
We focus on six subspecialties in all analyses. As can be seen in the following matrix, there is no problematic multicollinearity present between them:
# helper function for model estimation# - dat: dataset# - y: character name of the response variable# The function returns a list of the model results and the AUC valueestimate_logitModel <-function(dat, y) {# rename response variable, for easier handlingcolnames(dat)[colnames(dat) == y] <-"y"# model estimation model <-gam(y ~ dieteticsSubspecialty_obesityOrWeightManagement + dieteticsSubspecialty_gastroenterology + dieteticsSubspecialty_diabetes + dieteticsSubspecialty_paediatrics + dieteticsSubspecialty_elderly + dieteticsSubspecialty_oncology + dieteticsSubspecialty_psychEating + dietitian_forHowLong_10yearUnits + education + education_WFPBeducation_num,family =binomial(link ="logit"),data = dat)# prepare the results table results_tab <-summary(model)$p.table %>%as.data.frame() %>%mutate(parameter =row.names(.)) %>%rename(estimate ="Estimate",se ="Std. Error",pvalue ="Pr(>|z|)") %>%mutate(CI_lower = estimate -qnorm(.975) * se,CI_upper = estimate +qnorm(.975) * se,exp_estimate =exp(estimate),exp_CIlower =exp(CI_lower),exp_CIupper =exp(CI_upper),pvalue =case_when(pvalue < .0001~"<.0001",TRUE~as.character(round(pvalue, 4))),param_group =case_when(grepl("Subspecialty", parameter) ~"area of specialty",grepl("forHowLong", parameter) ~"years of practice",grepl("WFPBeducation", parameter) ~"WFPB education",grepl("education", parameter) ~"education"),param_group =factor(param_group, levels =c("area of specialty", "years of practice", "education", "WFPB education")),parameter =gsub(parameter, pattern ="dieteticsSubspecialty_", replacement ="")) %>%select(param_group, parameter, exp_estimate, exp_CIlower, exp_CIupper, pvalue)row.names(results_tab) <-NULL# The model's goodness-of-fit is determined by calculating the AUC (Area Under the Curve).# Therefore, the dataset is first randomly split into a training dataset (80% of the data)# and a test dataset (20% of the data).# The regression model is then re-estimated on the training data,# and the AUC is calculated on the test data.# basis: split data into a training set (80% of the data) and a test set (20%)set.seed(2024) train_obs <-sample(1:nrow(dat), size =round(0.8*nrow(dat)), replace =FALSE) dat_train <- dat %>%slice(train_obs) dat_test <- dat %>%slice(-train_obs)# 1) estimate the model on the training set model_train <-gam(y ~ dieteticsSubspecialty_obesityOrWeightManagement + dieteticsSubspecialty_gastroenterology + dieteticsSubspecialty_diabetes + dieteticsSubspecialty_paediatrics + dieteticsSubspecialty_elderly + dieteticsSubspecialty_oncology + dieteticsSubspecialty_psychEating + dietitian_forHowLong_10yearUnits + education,family =binomial(link ="logit"),data = dat_train)# 2) calculate ROC curve and the corresponding AUC value pred <-data.frame("outcome"=as.integer(dat_test$y) -1,"pred"=predict(model_train, newdata = dat_test)) gg <-ggplot(pred, aes(m = pred, d = outcome)) +geom_roc() +ggtitle("ROC curve") auc <-calc_auc(gg)$AUC# return resultslist("results_tab"= results_tab,"AUC"= auc)}
Code
# helper function to create an effect plot, based on a regression results tableplot_logitRegressionResults <-function(results_tab) {# effect plot results_tab %>%filter(parameter !="(Intercept)") %>%ggplot(aes(x = parameter)) +geom_hline(yintercept =1, lty =2, col ="gray50") +geom_pointrange(aes(y = exp_estimate, ymin = exp_CIlower, ymax = exp_CIupper, col = param_group)) +scale_y_continuous("Odds Ratio, on log2-transformed scale", trans ="log2") +facet_wrap(~ param_group, scales ="free_x", nrow =1) +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title.x =element_blank(),legend.position ="none")}
Knowledge research questions
Plant-based: Diet definition
What is their understanding on what constitutes/defines a plant-based dietary pattern?
Code
dat %>%select(starts_with("diet_perceptionPBdiet_")) %>%pivot_longer(cols =everything()) %>%group_by(name) %>%mutate(sum_yes =sum(value =="yes")) %>%ungroup() %>%arrange(desc(sum_yes)) %>%mutate(name =gsub(pattern ="diet_perceptionPBdiet_", replacement ="", name),name =factor(name, levels =rev(unique(name)))) %>%ggplot(aes(y = name, fill = value)) +geom_bar(position ="fill") +scale_fill_manual(values =c("lightgray", unname(col_vector["knowledge"]))) +scale_x_continuous(labels = scales::label_percent()) +xlab("Agreement [%]") +ggtitle("Perception of what is a plant-based diet") +theme(axis.title.y =element_blank(),legend.title =element_blank(),legend.position ="bottom")
Plant-based: Risk-reduction regarding specific diseases
What is their understanding and knowledge on the role of plant-based diets in the context of reducing the risk of different non-communicable-related diseases?
For the WFPB diet, estimate one logistic regression model each for type-2 diabetes, cardiovascular disease and weight loss, on the question ‘yes, I would recommend this diet’ vs. ‘no, I wouldn’t’.
Note: Similar models are not estimated for the vegan diet, as of too small numbers (12 “yes” answers regarding type-2 diabetes, 16 “yes” answers regarding cardiovascular disease, 12 “yes” answers regarding weight loss)
Structure of every logistic regression model:
dependent variable: ‘I would recommend this diet for this disease’ (‘yes’ vs. ‘no’)
independent variables: subspecialty (one dummy variable for each of the six subspecialties under focus), years of practice (in 10-year-units), education quality on WFPB diets (as numeric score with 0 = ‘No training’, 1 = ‘Very poor’ and 5 = ‘Excellent’)
further control variables: education
unused control variables: age (since strongly correlated with years of practice), gender (since dietitians are nearly exclusively female), personal diet (since the causality between a dietitian’s personal diet and the response variable is unclear)
Model estimation
Code
model_T2D_WFPB <-estimate_logitModel(dat, y ="T2Diab_dietWFPB")model_CD_WFPB <-estimate_logitModel(dat, y ="cardiovascDisease_dietWFPB")model_WL_WFPB <-estimate_logitModel(dat, y ="weightLoss_dietWFPB")
# prepare the datasetplot_dat_list <-lapply(1:length(subspecialties_varVector), function(i) { subsp <-unname(subspecialties_varVector)[i] subsp_label <-names(subspecialties_varVector)[i]# filter the dataset for dietitians with the specific subspecialty dat_subsp <- datcolnames(dat_subsp)[colnames(dat_subsp) == subsp] <-"x" dat_subsp <- dat_subsp %>%filter(x =="yes") results_dat <- dat_subsp %>%select(starts_with("WFPB_micronutrientsOfConcern_")) %>%pivot_longer(cols =everything(), names_to ="micronutrient") %>%group_by(micronutrient) %>%summarize(share_yes =sum(value =="yes") /n()) %>%arrange(desc(share_yes)) %>%mutate(micronutrient =gsub(pattern ="WFPB_micronutrientsOfConcern_", replacement ="", x = micronutrient),micronutrient =factor(micronutrient, levels =rev(micronutrient)),subspecialty = subsp_label,share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat <- plot_dat_list %>%bind_rows() %>%mutate(micronutrient =factor(micronutrient, levels =levels(plot_dat_nut$name)),subspecialty =factor(subspecialty, levels =names(subspecialties_varVector)))# heatmapplot_dat %>%ggplot(aes(x = subspecialty, y = micronutrient, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["knowledge"]),labels = scales::label_percent()) +ggtitle("WFPB micronutrients of concern\nby subspecialties") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Code
# prepare the datasetplot_dat_list <-lapply(levels(dat$age_cat), function(x) {# filter the dataset for dietitians with the specific age group dat_age <- dat %>%filter(age_cat == x) results_dat <- dat_age %>%select(starts_with("WFPB_micronutrientsOfConcern_")) %>%pivot_longer(cols =everything(), names_to ="micronutrient") %>%group_by(micronutrient) %>%summarize(share_yes =sum(value =="yes") /n()) %>%arrange(desc(share_yes)) %>%mutate(micronutrient =gsub(pattern ="WFPB_micronutrientsOfConcern_", replacement ="", x = micronutrient),micronutrient =factor(micronutrient, levels =rev(micronutrient)),age_cat = x,share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat <- plot_dat_list %>%bind_rows() %>%mutate(micronutrient =factor(micronutrient, levels =levels(plot_dat_nut$name)),age_cat =factor(age_cat, levels =levels(dat$age_cat))) # heatmapplot_dat %>%ggplot(aes(x = age_cat, y = micronutrient, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["knowledge"]),labels = scales::label_percent()) +ggtitle("WFPB micronutrients of concern\nby age groups") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Code
# prepare the datasetplot_dat_list <-lapply(levels(dat$dietitian_forHowLong_cat), function(x) {# filter the dataset for dietitians with the specific age group dat_practice <- dat %>%filter(dietitian_forHowLong_cat == x) results_dat <- dat_practice %>%select(starts_with("WFPB_micronutrientsOfConcern_")) %>%pivot_longer(cols =everything(), names_to ="micronutrient") %>%group_by(micronutrient) %>%summarize(share_yes =sum(value =="yes") /n()) %>%arrange(desc(share_yes)) %>%mutate(micronutrient =gsub(pattern ="WFPB_micronutrientsOfConcern_", replacement ="", x = micronutrient),micronutrient =factor(micronutrient, levels =rev(micronutrient)),dietitian_forHowLong_cat = x,share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat <- plot_dat_list %>%bind_rows() %>%mutate(micronutrient =factor(micronutrient, levels =levels(plot_dat_nut$name)),dietitian_forHowLong_cat =factor(dietitian_forHowLong_cat, levels =levels(dat$dietitian_forHowLong_cat))) # heatmapplot_dat %>%ggplot(aes(x = dietitian_forHowLong_cat, y = micronutrient, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["knowledge"]),labels = scales::label_percent()) +ggtitle("WFPB micronutrients of concern\nby dietitians' years of practice") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Model-based change
Individually for every nutrient apart from potassium (which has too few “yes” answers): Logistic regression on the question ‘yes, that’s a nutrient of concern’ vs. ‘no, it isn’t’
Structure of every logistic regression model:
dependent variable: ‘This is a WFPB micronutrient is of concern’ (‘yes’ vs. ‘no’)
independent variables: subspecialty (one dummy variable for each of the six subspecialties under focus), years of practice (in 10-year-units), education quality on WFPB diets (as numeric score with 0 = ‘No training’, 1 = ‘Very poor’ and 5 = ‘Excellent’)
further control variables: education
unused control variables: age (since strongly correlated with years of practice), gender (since dietitians are nearly exclusively female), personal diet (since the causality between a dietitian’s personal diet and the response variable is unclear)
Model estimation
Code
model_vitB12 <-estimate_logitModel(dat, y ="WFPB_micronutrientsOfConcern_vitB12")model_iron <-estimate_logitModel(dat, y ="WFPB_micronutrientsOfConcern_iron")model_calcium <-estimate_logitModel(dat, y ="WFPB_micronutrientsOfConcern_calcium")model_iodine <-estimate_logitModel(dat, y ="WFPB_micronutrientsOfConcern_iodine")model_LComg3 <-estimate_logitModel(dat, y ="WFPB_micronutrientsOfConcern_LComega3")model_vitD <-estimate_logitModel(dat, y ="WFPB_micronutrientsOfConcern_vitD")model_zinc <-estimate_logitModel(dat, y ="WFPB_micronutrientsOfConcern_zinc")model_SComg3 <-estimate_logitModel(dat, y ="WFPB_micronutrientsOfConcern_SComega3")model_folate <-estimate_logitModel(dat, y ="WFPB_micronutrientsOfConcern_folate")model_selen <-estimate_logitModel(dat, y ="WFPB_micronutrientsOfConcern_selenium")model_thiam <-estimate_logitModel(dat, y ="WFPB_micronutrientsOfConcern_thiamine")model_choline <-estimate_logitModel(dat, y ="WFPB_micronutrientsOfConcern_choline")
dat %>%ggplot(aes(y ="", fill = plantProtein_isIncomplete)) +geom_bar(position ="fill") +scale_x_continuous("Relative frequency [%]", labels = scales::label_percent()) +scale_fill_brewer(type ="div", palette ="PRGn", direction =-1) +ggtitle("Plant proteins are an incomplete source of protein") +guides(fill =guide_legend(reverse =TRUE)) +theme(axis.title.y =element_blank(),legend.title =element_blank(),legend.position ="bottom")
Univariate description
by subspecialty, age, years of practice
Code
# prepare the datasetplot_dat_list <-lapply(1:length(subspecialties_varVector), function(i) { subsp <-unname(subspecialties_varVector)[i] subsp_label <-names(subspecialties_varVector)[i]# filter the dataset for dietitians with the specific subspecialty dat_subsp <- datcolnames(dat_subsp)[colnames(dat_subsp) == subsp] <-"x" dat_subsp <- dat_subsp %>%filter(x =="yes") results_dat <-data.frame(plantProtein_isIncomplete =levels(dat_subsp$plantProtein_isIncomplete),share_yes =prop.table(table(dat_subsp$plantProtein_isIncomplete)) %>% as.vector,subspecialty = subsp_label) %>%mutate(share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat_proteinSubsp <- plot_dat_list %>%bind_rows() %>%mutate(subspecialty =factor(subspecialty, levels =names(subspecialties_varVector)),plantProtein_isIncomplete =factor(plantProtein_isIncomplete, levels =levels(dat$plantProtein_isIncomplete)))# heatmapplot_dat_proteinSubsp %>%ggplot(aes(x = subspecialty, y = plantProtein_isIncomplete, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["knowledge"]),labels = scales::label_percent()) +ggtitle("Plant proteins are an incomplete source of protein\nby subspecialties") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Code
# prepare the datasetplot_dat_list <-lapply(levels(dat$age_cat), function(x) {# filter the dataset for dietitians with the specific age group dat_age <- dat %>%filter(age_cat == x) results_dat <-data.frame(plantProtein_isIncomplete =levels(dat_age$plantProtein_isIncomplete),share_yes =prop.table(table(dat_age$plantProtein_isIncomplete)) %>% as.vector,age_cat = x) %>%mutate(share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat_proteinAge <- plot_dat_list %>%bind_rows() %>%mutate(age_cat =factor(age_cat, levels =levels(dat$age_cat)),plantProtein_isIncomplete =factor(plantProtein_isIncomplete, levels =levels(dat$plantProtein_isIncomplete)))# heatmapplot_dat_proteinAge %>%ggplot(aes(x = age_cat, y = plantProtein_isIncomplete, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["knowledge"]),labels = scales::label_percent()) +ggtitle("Plant proteins are an incomplete source of protein\nby age groups") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Code
# prepare the datasetplot_dat_list <-lapply(levels(dat$dietitian_forHowLong_cat), function(x) {# filter the dataset for dietitians with the specific years of practice dat_practice <- dat %>%filter(dietitian_forHowLong_cat == x) results_dat <-data.frame(plantProtein_isIncomplete =levels(dat_practice$plantProtein_isIncomplete),share_yes =prop.table(table(dat_practice$plantProtein_isIncomplete)) %>% as.vector,dietitian_forHowLong_cat = x) %>%mutate(share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat_proteinPractice <- plot_dat_list %>%bind_rows() %>%mutate(dietitian_forHowLong_cat =factor(dietitian_forHowLong_cat, levels =levels(dat$dietitian_forHowLong_cat)),plantProtein_isIncomplete =factor(plantProtein_isIncomplete, levels =levels(dat$plantProtein_isIncomplete)))# heatmapplot_dat_proteinPractice %>%ggplot(aes(x = dietitian_forHowLong_cat, y = plantProtein_isIncomplete, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["knowledge"]),labels = scales::label_percent()) +ggtitle("Plant proteins are an incomplete source of protein\nby dietitians' years of practice") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Model-based change
Estimation of a Logistic regression model:
dependent variable: binarized incomplete plant protein item (‘Yes, (strongly) agree’ vs. ‘Not sure / No, (strongly) disagree’)
independent variables: subspecialty (one dummy variable for each of the six subspecialties under focus), years of practice (in 10-year-units), education quality on WFPB diets (as numeric score with 0 = ‘No training’, 1 = ‘Very poor’ and 5 = ‘Excellent’)
further control variables: education
unused control variables: age (since strongly correlated with years of practice), gender (since dietitians are nearly exclusively female), personal diet (since the causality between a dietitian’s personal diet and the response variable is unclear)
The model’s goodness-of-fit is determined by calculating the AUC (Area Under the Curve). Therefore, the dataset is first randomly split into a training dataset (80% of the data) and a test dataset (20% of the data). The regression model is then re-estimated on the training data, and the AUC is calculated on the test data.
Code
# basis: split data into a training set (80% of the data) and a test set (20%)set.seed(2024)train_obs <-sample(1:nrow(dat), size =round(0.8*nrow(dat)), replace =FALSE)dat_train <- dat %>%slice(train_obs)dat_test <- dat %>%slice(-train_obs)
Code
# 1) estimate the model on the training setmodel_train <-gam(plantProteinIsIncomplete_binary ~ dieteticsSubspecialty_obesityOrWeightManagement + dieteticsSubspecialty_gastroenterology + dieteticsSubspecialty_diabetes + dieteticsSubspecialty_paediatrics + dieteticsSubspecialty_elderly + dieteticsSubspecialty_oncology + dieteticsSubspecialty_psychEating + dietitian_forHowLong_10yearUnits + education,family =binomial(link ="logit"),data = dat_train)# 2) calculate ROC curve and the corresponding AUC valuepred <-data.frame("outcome"=as.integer(dat_test$plantProteinIsIncomplete_binary) -1,"pred"=predict(model_train, newdata = dat_test))gg <-ggplot(pred, aes(m = pred, d = outcome)) +geom_roc() +ggtitle("ROC curve")auc <-calc_auc(gg)$AUC
AUC: 0.64
WFPB: Life-cycle knowledge score
Does knowledge of WFPB diets in the lifecycle change or is determined by i) age or ii) years practising as a dietitian?
We define a life-cycle WFPB knowledge score based on these five items:
A well-planned whole food plant-based diet is suitable and healthy for all life stages.
A well-planned whole food plant-based diet is suitable in all stages of pregnancy and lactation.
It is possible for children (infants and toddlers) to meet all nutritional requirements on a well-planned whole food plant-based diet?
It is possible for adolescents and teenagers to meet all nutritional requirements on a well-planned whole food 5. plant-based diet?
It is difficult for older persons to achieve their energy and protein requirements on a well-planned, whole food plant-based diet?
Based on these five items, the knowledge score for every person is calculated by taking the average of a person’s responses to these items (with the lowest response category ‘No, strongly disagree’ encoded as 0, and the highest ‘Yes, strongly agree’ encoded as 1). Categories ‘Disagree’, ‘Agree’ and ‘Unsure’ are set to values ‘0’ (since it’s a wrong answer), ‘0.75’, and ‘0.75 / 2 = 0.375’ (to reflect the middle between agree and disagree), respectively.
Since item number five is encoded in different direction (agreement = negative view of WFPB diets) compared to items one to four (agreement = positive view of WFPB diets), the direction of the Likert scale of item number five is turned around before calculating the knowledge score.
dat %>%ggplot(aes(x = dietitian_forHowLong_cat, y = knowledge_score_lifeCycle)) +geom_boxplot(color = col_vector["knowledge"]) +ggtitle("WFPB: Life-cycle knowledge score\nby years of practice") +ylab("knowledge score") +theme(axis.title.x =element_blank(),panel.grid.major.x =element_blank())
Model-based change
Estimation of a Gaussian regression model:
dependent variable: Life-cycle knowledge score (quasi-metric, assumed as metric here)
independent variables: subspecialty (one dummy variable for each of the six subspecialties under focus), years of practice (in 10-year-units), education quality on WFPB diets (as numeric score with 0 = ‘No training’, 1 = ‘Very poor’ and 5 = ‘Excellent’)
further control variables: education
unused control variables: age (since strongly correlated with years of practice), gender (since dietitians are nearly exclusively female), personal diet (since the causality between a dietitian’s personal diet and the response variable is unclear)
Does knowledge about critical micronutrients in WFPB diets change or is determined by i) age or ii) years practising as a dietitian?
We define a critical micronutrient WFPB knowledge score based on these seven micronutrients:
Vitamin B12
Calcium
Vitamin D
Zinc
Long-chain omega 3
Iodine
Selenium
Based on these seven micronutrients, the knowledge score for every person is calculated by taking the average of a person’s responses to these items. The obtained score then lies between zero and one and reflects the share of the above micronutrients which were correctly answered as being critical.
independent variables: subspecialty (one dummy variable for each of the six subspecialties under focus), years of practice (in 10-year-units), education quality on WFPB diets (as numeric score with 0 = ‘No training’, 1 = ‘Very poor’ and 5 = ‘Excellent’)
further control variables: education
unused control variables: age (since strongly correlated with years of practice), gender (since dietitians are nearly exclusively female), personal diet (since the causality between a dietitian’s personal diet and the response variable is unclear)
Does knowledge about uncritical micronutrients in WFPB diets change or is determined by i) age or ii) years practising as a dietitian?
We define an uncritical micronutrient WFPB knowledge score based on these six micronutrients:
Iron
Folic acid
Choline
Thiamine
Potassium
Short-chain omega 3
Based on these six micronutrients, the knowledge score for every person is calculated by taking the average of a person’s responses to these items. This score is then inverted (i.e., ‘new_score = 1 - old_score’), s.t. value 1 reflects all correct answers, and value 0 all wrong answers. The obtained score lies between zero and one and reflects the share of the above micronutrients which were correctly answered as not being critical.
independent variables: subspecialty (one dummy variable for each of the six subspecialties under focus), years of practice (in 10-year-units), education quality on WFPB diets (as numeric score with 0 = ‘No training’, 1 = ‘Very poor’ and 5 = ‘Excellent’)
further control variables: education
unused control variables: age (since strongly correlated with years of practice), gender (since dietitians are nearly exclusively female), personal diet (since the causality between a dietitian’s personal diet and the response variable is unclear)
Does knowledge about which disease’s conditions improve through WFPB diets change or is determined by i) age or ii) years practising as a dietitian?
We define a improved diseases WFPB knowledge score based on these 13 diseases:
Heart disease
High cholesterol
Hypertension
Diabetes
Obesity
Stroke
Chronic kidney disease
Cancers
Fatty liver disease
Alzeimers dementia
Vascular Dementia
Depression
Inflammatory disease
Based on these 13 diseases, the knowledge score for every person is calculated by taking the average of a person’s responses to these items. The obtained score then lies between zero and one and reflects the share of the above diseases which were correctly answered as benefiting by a WFPB diet through improved conditions.
dat %>%ggplot(aes(x = dietitian_forHowLong_cat, y = knowledge_score_diseases)) +geom_boxplot(color = col_vector["knowledge"]) +ggtitle("WFPB: Improved diseases knowledge score\nby years of practice") +ylab("knowledge score") +theme(axis.title.x =element_blank(),panel.grid.major.x =element_blank())
Model-based change
Estimation of a Gaussian regression model:
dependent variable: Diseases knowledge score (quasi-metric, assumed as metric here)
independent variables: subspecialty (one dummy variable for each of the six subspecialties under focus), years of practice (in 10-year-units), education quality on WFPB diets (as numeric score with 0 = ‘No training’, 1 = ‘Very poor’ and 5 = ‘Excellent’)
further control variables: education
unused control variables: age (since strongly correlated with years of practice), gender (since dietitians are nearly exclusively female), personal diet (since the causality between a dietitian’s personal diet and the response variable is unclear)
dat %>%ggplot(aes(y ="", fill = WFPB_isSustainableLongTerm)) +geom_bar(position ="fill") +scale_x_continuous("Relative frequency [%]", labels = scales::label_percent()) +scale_fill_brewer(type ="div", palette ="BrBG") +ggtitle("Long-term sustainability of WFPB diets") +guides(fill =guide_legend(reverse =TRUE)) +theme(axis.title.y =element_blank(),legend.title =element_blank(),legend.position ="bottom")
Univariate description
by subspecialty, age, years of practice, and personal dietary pattern.
Code
# prepare the datasetplot_dat_list <-lapply(1:length(subspecialties_varVector), function(i) { subsp <-unname(subspecialties_varVector)[i] subsp_label <-names(subspecialties_varVector)[i]# filter the dataset for dietitians with the specific subspecialty dat_subsp <- datcolnames(dat_subsp)[colnames(dat_subsp) == subsp] <-"x" dat_subsp <- dat_subsp %>%filter(x =="yes") results_dat <-data.frame(WFPB_isSustainableLongTerm =levels(dat_subsp$WFPB_isSustainableLongTerm),share_yes =prop.table(table(dat_subsp$WFPB_isSustainableLongTerm)) %>% as.vector,subspecialty = subsp_label) %>%mutate(share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat_sustainability_sub <- plot_dat_list %>%bind_rows() %>%mutate(subspecialty =factor(subspecialty, levels =names(subspecialties_varVector)),WFPB_isSustainableLongTerm =factor(WFPB_isSustainableLongTerm, levels =levels(dat$WFPB_isSustainableLongTerm)))# heatmapplot_dat_sustainability_sub %>%ggplot(aes(x = subspecialty, y = WFPB_isSustainableLongTerm, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["beliefs"]),labels = scales::label_percent()) +ggtitle("Long-term sustainability of WFPB diets\nby subspecialties") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Code
# prepare the datasetplot_dat_list <-lapply(levels(dat$age_cat), function(x) {# filter the dataset for dietitians with the specific age group dat_age <- dat %>%filter(age_cat == x) results_dat <-data.frame(WFPB_isSustainableLongTerm =levels(dat_age$WFPB_isSustainableLongTerm),share_yes =prop.table(table(dat_age$WFPB_isSustainableLongTerm)) %>% as.vector,age_cat = x) %>%mutate(share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat_sustainability_age <- plot_dat_list %>%bind_rows() %>%mutate(age_cat =factor(age_cat, levels =levels(dat$age_cat)),WFPB_isSustainableLongTerm =factor(WFPB_isSustainableLongTerm, levels =levels(dat$WFPB_isSustainableLongTerm)))# heatmapplot_dat_sustainability_age %>%ggplot(aes(x = age_cat, y = WFPB_isSustainableLongTerm, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["beliefs"]),labels = scales::label_percent()) +ggtitle("Long-term sustainability of WFPB diets\nby age groups") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Code
# prepare the datasetplot_dat_list <-lapply(levels(dat$dietitian_forHowLong_cat), function(x) {# filter the dataset for dietitians with the specific years of practice dat_practice <- dat %>%filter(dietitian_forHowLong_cat == x) results_dat <-data.frame(WFPB_isSustainableLongTerm =levels(dat_practice$WFPB_isSustainableLongTerm),share_yes =prop.table(table(dat_practice$WFPB_isSustainableLongTerm)) %>% as.vector,dietitian_forHowLong_cat = x) %>%mutate(share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat_sustainability_years <- plot_dat_list %>%bind_rows() %>%mutate(dietitian_forHowLong_cat =factor(dietitian_forHowLong_cat, levels =levels(dat$dietitian_forHowLong_cat)),WFPB_isSustainableLongTerm =factor(WFPB_isSustainableLongTerm, levels =levels(dat$WFPB_isSustainableLongTerm)))# heatmapplot_dat_sustainability_years %>%ggplot(aes(x = dietitian_forHowLong_cat, y = WFPB_isSustainableLongTerm, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["beliefs"]),labels = scales::label_percent()) +ggtitle("Long-term sustainability of WFPB diets\nby dietitians' years of practice") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Code
# prepare the datasetplot_dat_list <-lapply(levels(dat$diet_personal), function(x) {# filter the dataset for dietitians with the specific diet dat_diet <- dat %>%filter(diet_personal == x) results_dat <-data.frame(WFPB_isSustainableLongTerm =levels(dat_diet$WFPB_isSustainableLongTerm),share_yes =prop.table(table(dat_diet$WFPB_isSustainableLongTerm)) %>% as.vector,diet_personal = x) %>%mutate(share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat <- plot_dat_list %>%bind_rows() %>%mutate(diet_personal =factor(diet_personal, levels =levels(dat$diet_personal)),WFPB_isSustainableLongTerm =factor(WFPB_isSustainableLongTerm, levels =levels(dat$WFPB_isSustainableLongTerm)))# heatmapplot_dat %>%ggplot(aes(x = diet_personal, y = WFPB_isSustainableLongTerm, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["beliefs"]),labels = scales::label_percent()) +ggtitle("Long-term sustainability of WFPB diets\nby dietitians' personal diet") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Model-based change
Estimation of a Logistic regression model:
dependent variable: binarized long-term sustainability of WFPB diets (‘(Strongly) Agree’ vs. ‘Not sure / (Strongly) Disagree’)
independent variables: subspecialty (one dummy variable for each of the six subspecialties under focus), years of practice (in 10-year-units), education quality on WFPB diets (as numeric score with 0 = ‘No training’, 1 = ‘Very poor’ and 5 = ‘Excellent’)
further control variables: education
unused control variables: age (since strongly correlated with years of practice), gender (since dietitians are nearly exclusively female), personal diet (since the causality between a dietitian’s personal diet and the response variable is unclear)
The model’s goodness-of-fit is determined by calculating the AUC (Area Under the Curve). Therefore, the dataset is first randomly split into a training dataset (80% of the data) and a test dataset (20% of the data). The regression model is then re-estimated on the training data, and the AUC is calculated on the test data.
Code
# basis: split data into a training set (80% of the data) and a test set (20%)set.seed(2024)train_obs <-sample(1:nrow(dat), size =round(0.8*nrow(dat)), replace =FALSE)dat_train <- dat %>%slice(train_obs)dat_test <- dat %>%slice(-train_obs)
Code
# 1) estimate the model on the training setmodel_train <-gam(WFPBsustainability_binary ~ dieteticsSubspecialty_obesityOrWeightManagement + dieteticsSubspecialty_gastroenterology + dieteticsSubspecialty_diabetes + dieteticsSubspecialty_paediatrics + dieteticsSubspecialty_elderly + dieteticsSubspecialty_oncology + dieteticsSubspecialty_psychEating + dietitian_forHowLong_10yearUnits + education,family =binomial(link ="logit"),data = dat_train)# 2) calculate ROC curve and the corresponding AUC valuepred <-data.frame("outcome"=as.integer(dat_test$WFPBsustainability_binary) -1,"pred"=predict(model_train, newdata = dat_test))gg <-ggplot(pred, aes(m = pred, d = outcome)) +geom_roc() +ggtitle("ROC curve")auc <-calc_auc(gg)$AUC
AUC: 0.55
WFPB: Long-term adherence under co-morbidities
Do they believe clients with co-morbidities would adhere long-term to a WFPB diet?
Code
dat %>%ggplot(aes(y ="", fill = ChronicConditions_longTermWFPBadherence)) +geom_bar(position ="fill") +scale_x_continuous("Relative frequency [%]", labels = scales::label_percent()) +scale_fill_brewer(type ="div", palette ="BrBG") +ggtitle("Long-term adherence under co-morbidities") +guides(fill =guide_legend(reverse =TRUE)) +theme(axis.title.y =element_blank(),legend.title =element_blank(),legend.position ="bottom")
WFPB: Clients’ main motivation
What is the main motivation they have come across with their clients in adopting a WFPB diet?
How often would they recommend a WFPB diet in clinical practice?
Excluding 13 people from the following plots with value Not applicable.
Code
dat %>%filter(WFPBrecommendation_howOften !="Not applicable") %>%group_by(WFPBrecommendation_howOften) %>%summarize(n =n()) %>%mutate(rel_freq = n /sum(n)) %>%ggplot(aes(x = WFPBrecommendation_howOften, weight = rel_freq)) +geom_bar(fill = col_vector["beliefs"]) +scale_y_continuous(labels = scales::label_percent()) +ggtitle("How often would you recommend a WFPB diet to your patients?") +theme(axis.title =element_blank())
Univariate description
by subspecialty, age, years of practice.
Code
# prepare the datasetplot_dat_list <-lapply(1:length(subspecialties_varVector), function(i) { subsp <-unname(subspecialties_varVector)[i] subsp_label <-names(subspecialties_varVector)[i]# filter the dataset for dietitians with the specific subspecialty dat_subsp <- dat %>%filter(WFPBrecommendation_howOften !="Not applicable") %>%mutate(WFPBrecommendation_howOften =droplevels(WFPBrecommendation_howOften))colnames(dat_subsp)[colnames(dat_subsp) == subsp] <-"x" dat_subsp <- dat_subsp %>%filter(x =="yes") results_dat <-data.frame(WFPBrecommendation_howOften =levels(dat_subsp$WFPBrecommendation_howOften),share_yes =prop.table(table(dat_subsp$WFPBrecommendation_howOften)) %>% as.vector,subspecialty = subsp_label) %>%mutate(share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat <- plot_dat_list %>%bind_rows() %>%mutate(subspecialty =factor(subspecialty, levels =names(subspecialties_varVector)),WFPBrecommendation_howOften =factor(WFPBrecommendation_howOften, levels =levels(dat$WFPBrecommendation_howOften)))# heatmapplot_dat %>%ggplot(aes(x = subspecialty, y = WFPBrecommendation_howOften, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["beliefs"]),labels = scales::label_percent()) +ggtitle("Regularity of WFPB recommendation\nby subspecialties") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Code
# prepare the datasetplot_dat_list <-lapply(levels(dat$age_cat), function(x) {# filter the dataset for dietitians with the specific age group dat_age <- dat %>%filter(WFPBrecommendation_howOften !="Not applicable") %>%mutate(WFPBrecommendation_howOften =droplevels(WFPBrecommendation_howOften)) %>%filter(age_cat == x) results_dat <-data.frame(WFPBrecommendation_howOften =levels(dat_age$WFPBrecommendation_howOften),share_yes =prop.table(table(dat_age$WFPBrecommendation_howOften)) %>% as.vector,age_cat = x) %>%mutate(share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat <- plot_dat_list %>%bind_rows() %>%mutate(age_cat =factor(age_cat, levels =levels(dat$age_cat)),WFPBrecommendation_howOften =factor(WFPBrecommendation_howOften, levels =levels(dat$WFPBrecommendation_howOften)))# heatmapplot_dat %>%ggplot(aes(x = age_cat, y = WFPBrecommendation_howOften, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["beliefs"]),labels = scales::label_percent()) +ggtitle("Regularity of WFPB recommendation\nby age groups") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Code
# prepare the datasetplot_dat_list <-lapply(levels(dat$dietitian_forHowLong_cat), function(x) {# filter the dataset for dietitians with the specific years of practice dat_practice <- dat %>%filter(WFPBrecommendation_howOften !="Not applicable") %>%mutate(WFPBrecommendation_howOften =droplevels(WFPBrecommendation_howOften)) %>%filter(dietitian_forHowLong_cat == x) results_dat <-data.frame(WFPBrecommendation_howOften =levels(dat_practice$WFPBrecommendation_howOften),share_yes =prop.table(table(dat_practice$WFPBrecommendation_howOften)) %>% as.vector,dietitian_forHowLong_cat = x) %>%mutate(share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat <- plot_dat_list %>%bind_rows() %>%mutate(dietitian_forHowLong_cat =factor(dietitian_forHowLong_cat, levels =levels(dat$dietitian_forHowLong_cat)),WFPBrecommendation_howOften =factor(WFPBrecommendation_howOften, levels =levels(dat$WFPBrecommendation_howOften)))# heatmapplot_dat %>%ggplot(aes(x = dietitian_forHowLong_cat, y = WFPBrecommendation_howOften, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["beliefs"]),labels = scales::label_percent()) +ggtitle("Regularity of WFPB recommendation\nby dietitians' years of practice") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Model-based change
Estimation of a Logistic regression model:
dependent variable: binarized regularity of WFPB recommendation (‘Sometimes / Often / Always’ vs. ‘Rarely / Never’)
independent variables: subspecialty (one dummy variable for each of the six subspecialties under focus), years of practice (in 10-year-units), education quality on WFPB diets (as numeric score with 0 = ‘No training’, 1 = ‘Very poor’ and 5 = ‘Excellent’)
further control variables: education, educational resources on WFPB diets (as numeric score with 0 = ‘Strongly disagree’ and 4 = ‘Strongly agree’), workplace support on advocating for WFPB diets (as numeric score with 0 = ‘Strongly unsupported’ and 4 = ‘Strongly supported’)
unused control variables: age (since strongly correlated with years of practice), gender (since dietitians are nearly exclusively female), personal diet (since the causality between a dietitian’s personal diet and the response variable is unclear)
Code
# calculate the response variabledat <- dat %>%mutate(WFPBrecommendation_binary =case_when(WFPBrecommendation_howOften %in%c("Sometimes","Often","Always") ~"Sometimes / Often / Always", WFPBrecommendation_howOften %in%c("Rarely","Never") ~"Rarely / Never",TRUE~NA_character_),WFPBrecommendation_binary =factor(WFPBrecommendation_binary, levels =c("Rarely / Never", "Sometimes / Often / Always")))# make numeric scores out of some control variablesdat <- dat %>%mutate(PBdiet_enoughEduResources_num =as.numeric(Pbdiet_enoughProperEducationalResources) -1,workplace_supportForWFPBadvocacy_num =as.numeric(workplace_supportForWFPBadvocacy) -1)# model estimationmodel <-gam(WFPBrecommendation_binary ~ dieteticsSubspecialty_obesityOrWeightManagement + dieteticsSubspecialty_gastroenterology + dieteticsSubspecialty_diabetes + dieteticsSubspecialty_paediatrics + dieteticsSubspecialty_elderly + dieteticsSubspecialty_oncology + dieteticsSubspecialty_psychEating + dietitian_forHowLong_10yearUnits + education + education_WFPBeducation_num + PBdiet_enoughEduResources_num + workplace_supportForWFPBadvocacy_num,family =binomial(link ="logit"),data = dat)# prepare the results tableresults_tab <-summary(model)$p.table %>%as.data.frame() %>%mutate(parameter =row.names(.)) %>%rename(estimate ="Estimate",se ="Std. Error",pvalue ="Pr(>|z|)") %>%mutate(CI_lower = estimate -qnorm(.975) * se,CI_upper = estimate +qnorm(.975) * se,exp_estimate =exp(estimate),exp_CIlower =exp(CI_lower),exp_CIupper =exp(CI_upper),pvalue =case_when(pvalue < .0001~"<.0001",TRUE~as.character(round(pvalue, 4))),param_group =case_when(grepl("Subspecialty", parameter) ~"area of specialty",grepl("forHowLong", parameter) ~"years of practice",grepl("_num", parameter) ~"barriers",grepl("education", parameter) ~"education"),param_group =factor(param_group, levels =c(" ", "years of practice", "education", "barriers")),parameter =gsub(parameter, pattern ="dieteticsSubspecialty_", replacement ="")) %>%select(param_group, parameter, exp_estimate, exp_CIlower, exp_CIupper, pvalue)row.names(results_tab) <-NULLresults_tab %>%mutate(exp_estimate =round(exp_estimate, 4),exp_CIlower =round(exp_CIlower, 4),exp_CIupper =round(exp_CIupper, 4)) %>%kable() %>%kable_styling()
param_group
parameter
exp_estimate
exp_CIlower
exp_CIupper
pvalue
NA
(Intercept)
0.1167
0.0469
0.2901
<.0001
NA
obesityOrWeightManagementyes
1.8364
1.0206
3.3044
0.0426
NA
gastroenterologyyes
1.3231
0.6915
2.5314
0.3977
NA
diabetesyes
1.1655
0.5967
2.2765
0.6539
NA
paediatricsyes
0.7996
0.4083
1.5657
0.5142
NA
elderlyyes
0.9868
0.4907
1.9845
0.9702
NA
oncologyyes
0.5132
0.2211
1.1913
0.1205
NA
psychEatingyes
1.2272
0.5556
2.7105
0.6126
years of practice
dietitian_forHowLong_10yearUnits
1.2509
0.9771
1.6013
0.0757
education
educationPostgraduate degree
2.4540
1.4715
4.0926
6e-04
education
educationPhD
4.4036
1.4067
13.7854
0.0109
barriers
education_WFPBeducation_num
0.9333
0.7401
1.1768
0.5594
barriers
PBdiet_enoughEduResources_num
1.1198
0.8532
1.4697
0.4147
barriers
workplace_supportForWFPBadvocacy_num
1.3925
1.0849
1.7874
0.0093
Code
# effect plotresults_tab %>%filter(parameter !="(Intercept)") %>%filter(exp_CIupper <1000) %>%# hide the 'WFPBeducation = excellent' subgroup as of only one observation in itggplot(aes(x = parameter)) +geom_hline(yintercept =1, lty =2, col ="gray50") +geom_pointrange(aes(y = exp_estimate, ymin = exp_CIlower, ymax = exp_CIupper, col = param_group)) +scale_y_continuous("Odds Ratio, on log2-transformed scale", trans ="log2") +facet_wrap(~ param_group, scales ="free_x", nrow =1) +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title.x =element_blank(),legend.position ="none")
The model’s goodness-of-fit is determined by calculating the AUC (Area Under the Curve). Therefore, the dataset is first randomly split into a training dataset (80% of the data) and a test dataset (20% of the data). The regression model is then re-estimated on the training data, and the AUC is calculated on the test data.
Code
# basis: split data into a training set (80% of the data) and a test set (20%)set.seed(2024)train_obs <-sample(1:nrow(dat), size =round(0.8*nrow(dat)), replace =FALSE)dat_train <- dat %>%slice(train_obs)dat_test <- dat %>%slice(-train_obs)
Code
# 1) estimate the model on the training setmodel_train <-gam(WFPBrecommendation_binary ~ dieteticsSubspecialty_obesityOrWeightManagement + dieteticsSubspecialty_gastroenterology + dieteticsSubspecialty_diabetes + dieteticsSubspecialty_paediatrics + dieteticsSubspecialty_elderly + dieteticsSubspecialty_oncology + dieteticsSubspecialty_psychEating + dietitian_forHowLong_10yearUnits + education,family =binomial(link ="logit"),data = dat_train)# 2) calculate ROC curve and the corresponding AUC valuepred <-data.frame("outcome"=as.integer(dat_test$WFPBrecommendation_binary) -1,"pred"=predict(model_train, newdata = dat_test))gg <-ggplot(pred, aes(m = pred, d = outcome)) +geom_roc() +ggtitle("ROC curve")auc <-calc_auc(gg)$AUC
AUC: 0.61
WFPB: Availability in hospitals
Do they believe that WFPB diets should be integrated into hospitals or other healthcare facilities?
Code
dat %>%ggplot(aes(y ="", fill = withinHealthcare_PbdietShouldBeOption)) +geom_bar(position ="fill") +scale_x_continuous("Relative frequency [%]", labels = scales::label_percent()) +scale_fill_brewer(type ="div", palette ="BrBG") +guides(fill =guide_legend(reverse =TRUE)) +ggtitle("Should WFPB diets be available in hospitals?") +theme(axis.title.y =element_blank(),legend.title =element_blank(),legend.position ="bottom")
Univariate description
by subspecialty and years of practice
Code
# prepare the datasetplot_dat_list <-lapply(1:length(subspecialties_varVector), function(i) { subsp <-unname(subspecialties_varVector)[i] subsp_label <-names(subspecialties_varVector)[i]# filter the dataset for dietitians with the specific subspecialty dat_subsp <- datcolnames(dat_subsp)[colnames(dat_subsp) == subsp] <-"x" dat_subsp <- dat_subsp %>%filter(x =="yes") results_dat <-data.frame(withinHealthcare_PbdietShouldBeOption =levels(dat_subsp$withinHealthcare_PbdietShouldBeOption),share_yes =prop.table(table(dat_subsp$withinHealthcare_PbdietShouldBeOption)) %>% as.vector,subspecialty = subsp_label) %>%mutate(share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat_hospitals_sub <- plot_dat_list %>%bind_rows() %>%mutate(subspecialty =factor(subspecialty, levels =names(subspecialties_varVector)),withinHealthcare_PbdietShouldBeOption =factor(withinHealthcare_PbdietShouldBeOption, levels =levels(dat$withinHealthcare_PbdietShouldBeOption)))# heatmapplot_dat_hospitals_sub %>%ggplot(aes(x = subspecialty, y = withinHealthcare_PbdietShouldBeOption, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["beliefs"]),labels = scales::label_percent()) +ggtitle("Should WFPB diets be available in hospitals?\nby subspecialties") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Code
# prepare the datasetplot_dat_list <-lapply(levels(dat$dietitian_forHowLong_cat), function(x) {# filter the dataset for dietitians with the specific years of practice dat_practice <- dat %>%filter(dietitian_forHowLong_cat == x) results_dat <-data.frame(withinHealthcare_PbdietShouldBeOption =levels(dat_practice$withinHealthcare_PbdietShouldBeOption),share_yes =prop.table(table(dat_practice$withinHealthcare_PbdietShouldBeOption)) %>% as.vector,dietitian_forHowLong_cat = x) %>%mutate(share_label =paste0(round(100* share_yes), "%"))return(results_dat)})plot_dat_hospitals_years <- plot_dat_list %>%bind_rows() %>%mutate(dietitian_forHowLong_cat =factor(dietitian_forHowLong_cat, levels =levels(dat$dietitian_forHowLong_cat)),withinHealthcare_PbdietShouldBeOption =factor(withinHealthcare_PbdietShouldBeOption, levels =levels(dat$withinHealthcare_PbdietShouldBeOption)))# heatmapplot_dat_hospitals_years %>%ggplot(aes(x = dietitian_forHowLong_cat, y = withinHealthcare_PbdietShouldBeOption, fill = share_yes)) +geom_tile() +geom_text(aes(label = share_label), color ="gray40", size =3) +scale_fill_gradient("Agreement [%]", low ="white", high =unname(col_vector["beliefs"]),labels = scales::label_percent()) +ggtitle("Should WFPB diets be available in hospitals?\nby dietitians' years of practice") +theme(axis.text.x =element_text(angle =45, hjust =1),axis.title =element_blank())
Barriers research questions
WFPB: Personal barriers
Those that have chosen not to follow a WFPB diet personally, what was the main reasons/barriers in preventing them from adopting one?
Figure 3: Risk reduction for the three different diseases
Code
update_dietLabels <-function(dat) { dat %>%arrange(name) %>%mutate(name =case_when(name =="Medit"~"Mediterranean", name =="National"~"National guidelines", name =="LowCarbs"~"Low carb", name =="LowEnergy"~"Low energy", name =="TDR"~"Total diet replacement", name =="LowFat"~"Low fat", name =="PDR"~"Partial diet replacement", name =="HighProtein"~"High protein", name =="Keto"~"Ketogenic",TRUE~ name),name =factor(name, levels =unique(as.character(name))))}